R Markdown

#Loading Packages
#Will most likely add more
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(bulletxtrctr)
## Warning in rgl.init(initValue, onlyNULL): RGL: unable to open X11 display
## Warning: 'rgl_init' failed, running with rgl.useNULL = TRUE
library(x3ptools)
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
library(readr)
library(furrr)
## Loading required package: future
library(stringr)
library(dichromat)
library(future)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following object is masked from 'package:dplyr':
## 
##     combine
library(tidyr)
options(future.globals.maxSize = 4*1024*1024*1024)
#data_dir <- "/media/Raven/REU_Refit"
load("Data_Needed.RData")

Reading in Hamby Data

# Reading in Hamby_173
df <- tibble(path = list.files(path = "Hamby_173", 
                               pattern = ".x3p", recursive = T, 
                               full.names = T)) %>% 
  mutate(Barrel = str_extract(path, "(Unknown|Barrel)\\d{0,2}") %>% 
           str_remove("Barrel"), 
         Bullet = str_extract(path, "Bullet[12A-Z]") %>% 
           str_remove("Bullet"),
         Land = str_extract(path, "land\\d{1}") %>% 
           str_remove("land")) %>% 
  mutate(Set = "Hamby173") %>%
  mutate(x3p = map(path, read_x3p)) %>%
  mutate(x3p = map(x3p, ~x3p_m_to_mum(.) %>% y_flip_x3p()))

# Reading in Hamby_252
df2 <- tibble(path = list.files(path = "Hamby_252", 
                                pattern = ".x3p", recursive = T, 
                                full.names = T)) %>% 
  mutate(Barrel = str_extract(path, "(Unknown|Barrel)\\d{0,2}") %>% 
           str_remove("Barrel"), 
         Bullet = str_extract(path, "Bullet[12A-Z]") %>% 
           str_remove("Bullet"), 
         Land = str_extract(path, "Bullet [12A-Z]-[123456]") %>% 
           str_remove("Bullet [12A-Z]-")) %>% 
  mutate(Set = "Hamby252") %>%
  mutate(x3p = map(path,read_x3p))  %>%
  # Adjust orientation
  mutate(x3p = map(x3p, ~x3p_m_to_mum(.) %>% rotate_x3p(angle = -90) %>% y_flip_x3p()))
# One big data set - easier to debug the code if you do everything in one big go.
hamby <- bind_rows(df, df2) 
hamby <- hamby %>%
  mutate(id = paste(Set, Barrel, Bullet, Land, sep = "-")) %>%
  select(id, Set, Barrel, Bullet, Land, x3p, path)
rm(df, df2)

Cross Sections

plan(multicore) # use all the cores at once
hamby <- hamby %>%
  mutate(
    CrossSection = map_dbl(x3p, x3p_crosscut_optimize, minccf = 0.9, span = 0.3, percent_missing = 25))

head(select(hamby, -path, -x3p), 5)
#hamby %>% arrange(desc(CrossSection))
plot1 = hamby %>% 
  filter(Barrel != "Unknown") %>% 
    ggplot(aes(x = Barrel, y = CrossSection, fill = Bullet))+ 
      geom_boxplot()+
          ggtitle("Barrels 1-10")

plot2 = hamby %>% 
  filter(Barrel == "Unknown") %>% 
    ggplot(aes(x = Barrel, y = CrossSection, fill = Bullet))+ 
      geom_boxplot()+
          ggtitle("Barrel Unknown")

grid.arrange(plot1, plot2)

Looking at the Boxplot for Barrels 1-10, we can see the inter-quartile ranges seem to be roughly the same for each Bullet. For Barrels 1-10, Bullet 2 seems to have higher values than Bullet 1. Barrels 6 and 7 have the largest whiskers. Barrel 1, Bullet 1 has the smallest inter-quartile range and Barrel 9, Bullet 2 has the largest interquartile range. There is one apparent outlier for Barrel 10, Bullet 1. This CrossSection Value is not high enough for concern when looking at all Barrels but should be examined separately.

Looking at the Boxplot for Barrel “Unknown” we can see an apparent outlier for Bullet B. This CrossSection value is 400. Unlike with Barrels 1-10 we can see the Bullets for Barrel “Unknown” have noticeably more lower whiskers.

#Cross Sections
hamby <- hamby %>% 
  mutate(CrossCut = map2(.x = x3p, .y = CrossSection, .f = x3p_crosscut))

crosscuts <- select(hamby, -path, -x3p) %>% 
      tidyr::unnest(CrossCut)

crosscuts %>% 
  arrange(desc(CrossSection))

Note: For Hamby173 barrels 1, 2, and 4 for bullet 1 seem to have higher crosscut values plotted. For Hamby173 barrel 1, bullet 2 seems to have a higher crosscut values plotted. There doesn’t seem to be any noticeably low values for Hamby173.

Note: For Hamby 252 barrel 10 and 2, bullet 1 has high crosscut values plotted. There also seems to be some low crosscut values plotted as well.

#Looking at CrossCuts to see any issues

crosscuts %>%
  filter(Barrel != "Unknown" & Set == "Hamby173") %>% 
    ggplot(data = ., aes(x = x, y = value, color = Land)) + 
      geom_line() + 
        facet_grid(Set + paste("Bullet", Bullet) ~ sprintf("Barrel %02s", Barrel))

ggsave("CrossCutPlot1.png")
## Saving 7 x 5 in image
crosscuts %>%
  filter(Barrel != "Unknown" & Set == "Hamby252") %>% 
    ggplot(data = ., aes(x = x, y = value, color = Land)) + 
      geom_line() + 
        facet_grid(Set + paste("Bullet", Bullet) ~ sprintf("Barrel %02s", Barrel))

ggsave("CrossCutPlot2.png")
## Saving 7 x 5 in image
crosscuts %>% 
  filter(Set == "Hamby173" & Barrel == "Unknown") %>%
    ggplot(aes(x = x, y = value)) + 
      geom_line() +
        facet_grid(Bullet~Land, labeller="label_both") +
          theme_bw()+ ggtitle("Barrel Unknown for Hamby173")

crosscuts %>% 
  filter(Set == "Hamby252" & Barrel == "Unknown") %>%
    ggplot(aes(x = x, y = value)) + 
      geom_line() +
        facet_grid(Bullet~Land, labeller="label_both") +
          theme_bw()+ ggtitle("Barrel Unknown for Hamby252")

crosscuts %>% filter(id == "Hamby252-Unknown-B-2") %>% 
  ggplot(aes(x = x, y = value))+
    geom_line()+
      facet_grid(Bullet~Land + Set, labeller = "label_both")+
        theme_bw() + ggtitle("High CrossSection Bullet B")

Looking at the CrossCuts we can see the side profile for a bullets land impression for every barrel in Hamby173 and Hamby252. For most of the CrossCuts the shoulders can be clearly identified but there are some that can not be “clearly” identified due to possbile tank rash, pitting, or scans that do not meet the standards that forensic analysis require. I can not say for certain if there are scans that should be excluded. I used FIX3P to examine the “groove to groove” scans and noticed considerable tank rash on some of the scans. There was some noticeable pitting as well but I only felt it was a concern when the pitting was concentrated in the center of the “groove to groove” scan.

When looking at the Crosscuts we can see most have noticeable curvature. There are some scans that do not and these scans can look like a flat line or even a traingle… I Have made note of these particular scans as well.

Groove Loader

#Grooves
saved_grooves_location <- "V2H173_H252_Grooves_data.rda"
if (file.exists(saved_grooves_location)) {
  hamby$Grooves <- readRDS(saved_grooves_location)
} else {
  hamby <- hamby %>% 
    mutate(Grooves = CrossCut %>% 
             map(.f = cc_locate_grooves, 
                        method = "rollapply", smoothfactor = 15, return_plot = T))  # use plot so that the shiny app works...
}

grooves <- hamby %>% tidyr::unnest(Grooves)

head(grooves, 10)

head(select(hamby, -path, -x3p, -CrossCut), 5)

Examining Groove Cut Offs

Hamby_test <- hamby %>% 
  filter(Set == "Hamby252") %>% 
    filter(Barrel == 6)

Hamby_test2 <- hamby %>% 
  filter(Set == "Hamby252") %>% 
    filter(Barrel == 3 & Bullet == 1)

Hamby_test3 <- hamby %>% 
  filter(Set == "Hamby252") %>% 
    filter(Barrel == 1 & Bullet == 1)

Hamby_test4 <- hamby %>% 
  filter(Set == "Hamby252") %>% 
    filter(Barrel == 9 & Bullet == 2)

Hamby_test5 <- hamby %>% 
  filter(Set == "Hamby252") %>% 
    filter(Barrel == "Unknown") %>% 
      filter(Bullet == "B" | Bullet == "S" | Bullet == "U")

Hamby_test6 <- hamby %>% 
  filter(Set == "Hamby173") %>% 
    filter(Barrel == 3 & Bullet == 2)

Hamby_test7 <- hamby %>% 
  filter(Set == "Hamby173") %>% 
    filter(Barrel == "Unknown") %>% 
      filter(Bullet == "B" | Bullet == "E" | Bullet == "U")


gridExtra::grid.arrange(Hamby_test$Grooves[[1]]$plot,
                        Hamby_test$Grooves[[7]]$plot,
                         Hamby_test2$Grooves[[4]]$plot,
                         Hamby_test3$Grooves[[6]]$plot,
                         Hamby_test4$Grooves[[4]]$plot,
                         Hamby_test5$Grooves[[2]]$plot,
                         Hamby_test5$Grooves[[10]]$plot,
                        nrow = 2)
                        
                        

                        
gridExtra::grid.arrange(Hamby_test6$Grooves[[1]]$plot,
                        Hamby_test7$Grooves[[3]]$plot,
                        Hamby_test7$Grooves[[15]]$plot,
                        Hamby_test7$Grooves[[12]]$plot,
                        nrow = 2)

rm(Hamby_test, Hamby_test2, Hamby_test3, Hamby_test4, Hamby_test5, Hamby_test6, Hamby_test7 )

When run in interactive mode, a shiny app can be used to set smarter grooves.

Groove Identifications by: Andrew Maloney

Using Shiny App

library(shiny)
if (file.exists(saved_grooves_location)) {
  hamby$Grooves <- readRDS(saved_grooves_location)
}
if (interactive()) { # only run when you're manually running chunks... don't run when the whole document is compiled.
  shinyApp(
    ui = fluidPage(
      selectInput("k", "Investigate kth plot:",
                  selected = 1,
                  choices = (1:length(hamby$Grooves)) %>% set_names(hamby$id)
      ),
      textOutput("groovelocations"),
      actionButton("confirm", "Confirm"),
      actionButton("save", "Save"),
      plotOutput("groovePlot", click = "plot_click"),
      verbatimTextOutput("info")
    ),
    
    server = function(input, output, session) {
      output$groovePlot <- renderPlot({
        k <- as.numeric(input$k)
        p <- hamby$Grooves[[k]]$plot
        
        p
      })
      output$groovelocations <- renderText({
        paste(
          "Left Groove: ", hamby$Grooves[[as.numeric(input$k)]]$groove[1],
          " Right Groove: ", hamby$Grooves[[as.numeric(input$k)]]$groove[2]
        )
      })
      observeEvent(input$confirm, {
        cat(paste(hamby$id[as.numeric(input$k)], "\n"))
        updateSelectInput(session, "k", "Investigate kth plot:",
                          selected = as.numeric(input$k) + 1,
                          choices = (1:length(hamby$Grooves)) %>% set_names(hamby$id)
        )
      })
      observeEvent(input$save, {
        saveRDS(hamby$Grooves, file = saved_grooves_location)
        message("groove data saved\n")
      })
      
      observeEvent(input$plot_click, {
        k <- as.numeric(input$k)
        xloc <- input$plot_click$x
        
        gr <- hamby$Grooves[[k]]$groove
        if (abs(gr[1] - xloc) < abs(gr[2] - xloc)) {
          hamby$Grooves[[k]]$groove[1] <<- xloc
        } else {
          hamby$Grooves[[k]]$groove[2] <<- xloc
        }
        output$groovePlot <- renderPlot({
          k <- as.numeric(input$k)
          p <- hamby$Grooves[[k]]$plot +
            geom_vline(xintercept = hamby$Grooves[[k]]$groove[1], colour = "green") +
            geom_vline(xintercept = hamby$Grooves[[k]]$groove[2], colour = "green")
          
          p
        })
      })
      output$info <- renderText({
        paste0("x=", input$plot_click$x, "\ny=", input$plot_click$y)
      })
    },
    options = list(height = 500)
  )
  saveRDS(hamby$Grooves, file = saved_grooves_location)
} else {
  if (!file.exists(saved_grooves_location)) {
    message("run script in interactive mode to fix grooves")
  } else {
    hamby$Grooves <- readRDS(saved_grooves_location)
  }
}


#Test Some Grooves(performed:06/12/19)
#Signatures
hamby <- hamby %>% 
  mutate(Signatures = map2(.x = CrossCut, .y = Grooves, .f = cc_get_signature, span = 0.75, span2 = .03)) 

Signatures <- hamby %>% 
  select(id, Set, Barrel, Bullet, Land, Signatures) %>% 
    tidyr::unnest()
#Looking for major issues - grooves not set correctly (large deviations at beginning or end of a line)

Signatures %>%
  filter(Barrel != "Unknown") %>% 
ggplot(data = ., aes(x = x, y = sig, color = Land)) + 
  geom_line()+
  facet_grid(Set + paste("Bullet", Bullet) ~ sprintf("Barrel %02s", Barrel))+
  ggtitle("Signatures Barrels 1-10")
## Warning: Removed 2733 rows containing missing values (geom_path).

# put the barrels in the right order
ggsave("Signature_Plot1.png")
## Saving 7 x 5 in image
## Warning: Removed 2733 rows containing missing values (geom_path).
Signatures %>%
  filter(Set == "Hamby173") %>%
  filter(Barrel == "Unknown") %>% 
ggplot(data = ., aes(x = x, y = sig, color = Land)) + 
  geom_line()+
  facet_wrap(paste("Bullet", Bullet) ~ sprintf("Barrel %02s", Barrel), ncol = 4)+
    ggtitle("Barrels Unknown for Hamby 173")
## Warning: Removed 1682 rows containing missing values (geom_path).

ggsave("Signature_Plot2.png")
## Saving 7 x 5 in image
## Warning: Removed 1682 rows containing missing values (geom_path).
Signatures %>%
  filter(Set == "Hamby252") %>%
  filter(Barrel == "Unknown") %>% 
ggplot(data = ., aes(x = x, y = sig, color = Land)) + 
  geom_line()+
  facet_wrap(paste("Bullet", Bullet) ~ sprintf("Barrel %02s", Barrel), ncol = 4)+
    ggtitle("Barrels Unknown for Hamby 252")
## Warning: Removed 1550 rows containing missing values (geom_path).

ggsave("Signature_Plot3.png")
## Saving 7 x 5 in image
## Warning: Removed 1550 rows containing missing values (geom_path).

Examining the Signature Profiles we can see for barrels 1-10, for both Hamby173 + Hamby252, that the signatures for lands 1-6 are aligned pretty well. There seems to be some noticeable deviations at the beginning and end for Barrels 1-10 in both sets. I looked at the groove to groove scans using FIX3P and these spikes at the beginning and end for Barrels 1 and 10 can be identified. Groove locations seem to be set correctly for these lands.

There is some noticeable missing data for Signature comparisons [Hamby252/Bullet1] when filling by Bullet. This is hard to see when filling by the Lands.

We also examine the number of consecutively matching peaks in signatures for two aligned lands.(Case Validation Paper)

Groove Identifications Approved

comparisons <- crossing(Bullet1 = hamby$id, Bullet2 = hamby$id) %>%
  left_join(nest(hamby, -id) %>% magrittr::set_names(c("Bullet1", "Bullet1_data"))) %>%
  left_join(nest(hamby, -id) %>% magrittr::set_names(c("Bullet2", "Bullet2_data"))) %>%
  mutate(Set1 = str_extract(Bullet1, "Hamby\\d{2,3}"),
         Set2 = str_extract(Bullet2, "Hamby\\d{2,3}")) %>%
  filter(Set1 == Set2) %>% # Get rid of cross-set comparisons for now...
  select(-matches("Set"))
#plan(multicore(workers = availableCores(constraints = 8)))

get_sig <- function(data) {
  map(data$Signatures, "sig")
}
comparisons <- comparisons %>%
  mutate(sig1 = map(Bullet1_data, get_sig), sig2 = purrr::map(Bullet2_data, get_sig))

plan(multicore)

comparisons <- comparisons %>%
  mutate(Aligned = map2(sig1, sig2, ~sig_align(unlist(.x), unlist(.y)))) # Getting Aligned signatures

# Get striae
comparisons <- comparisons %>%
  mutate(Striae = map(Aligned, sig_cms_max)) # Obtaining Striae

saveRDS(select(comparisons, -Bullet1_data, -Bullet2_data), file = "Hamby_173_252_Comparisons.rda")

Extracting features

comparisons <- comparisons %>% 
  select(-Bullet1_data, -Bullet2_data)

comparisons <- comparisons %>% 
  mutate(features = map2(.x = Aligned, .y = Striae, .f = extract_features_all, resolution = 1.5625)) #ObtainingFeatures

comparisons <- comparisons %>% 
  mutate(Legacy_Features = map(Striae, extract_features_all_legacy, resolution = 1.5625)) # Obtaining feature leacy

comparisons <- comparisons %>% 
  tidyr::unnest(Legacy_Features) # Extracting feature legacy

Creating Columns for organization and for future plotting purposes.

comparisons <- comparisons %>% 
  mutate(Set = gsub("(Hamby173|Hamby252)-([0-9]{1,2}|Unknown)-([1-2A-Z])-([1-6])", "\\1", Bullet2)) # Creating columns from IDs
comparisons <- comparisons %>% 
  mutate(BarrelA = gsub("(Hamby173|Hamby252)-([0-9]{1,2}|Unknown)-([1-2A-Z])-([1-6])", "\\2", Bullet2))
comparisons <- comparisons %>% 
  mutate(BarrelB = gsub("(Hamby173|Hamby252)-([0-9]{0,2}|Unknown)-([1-2A-Z])-([1-6])", "\\2", Bullet1))
comparisons <- comparisons %>% 
  mutate(BulletA = gsub("(Hamby173|Hamby252)-([0-9]{1,2}|Unknown)-([1-2A-Z])-([1-6])", "\\3", Bullet2))
comparisons <- comparisons %>% 
  mutate(BulletB = gsub("(Hamby173|Hamby252)-([0-9]{1,2}|Unknown)-([1-2A-Z])-([1-6])", "\\3", Bullet1))
comparisons <- comparisons %>% 
  mutate(LandA = gsub("(Hamby173|Hamby252)-([0-9]{1,2}|Unknown)-([1-2A-Z])-([1-6])", "\\4", Bullet2))
comparisons <- comparisons %>% 
  mutate(LandB = gsub("(Hamby173|Hamby252)-([0-9]{1,2}|Unknown)-([1-2A-Z])-([1-6])", "\\4", Bullet1))
head(comparisons, 500)
## # A tibble: 500 x 21
##    Bullet1 Bullet2   ccf rough_cor    lag       D    sd_D signature_length
##    <chr>   <chr>   <dbl>     <dbl>  <dbl>   <dbl>   <dbl>            <dbl>
##  1 Hamby1… Hamby1… 1         1      0     0       0.00489             1.64
##  2 Hamby1… Hamby1… 0.350     0.350  0.116 0.00259 0.00479             2.02
##  3 Hamby1… Hamby1… 0.519     0.519 -0.164 0.00186 0.00348             1.88
##  4 Hamby1… Hamby1… 0.220     0.220  0.146 0.00295 0.00478             2.09
##  5 Hamby1… Hamby1… 0.292     0.292  0.035 0.00250 0.00442             1.80
##  6 Hamby1… Hamby1… 0.248     0.248  0     0.00284 0.00458             1.78
##  7 Hamby1… Hamby1… 0.248     0.248 -0.088 0.00222 0.00377             1.76
##  8 Hamby1… Hamby1… 0.287     0.287  0.115 0.00233 0.00425             1.94
##  9 Hamby1… Hamby1… 0.258     0.258 -0.113 0.00438 0.00683             1.66
## 10 Hamby1… Hamby1… 0.271     0.271  0.159 0.00246 0.00370             1.89
## # … with 490 more rows, and 13 more variables: overlap <dbl>,
## #   matches <dbl>, mismatches <dbl>, cms <dbl>, non_cms <dbl>,
## #   sum_peaks <dbl>, Set <chr>, BarrelA <chr>, BarrelB <chr>,
## #   BulletA <chr>, BulletB <chr>, LandA <chr>, LandB <chr>

Features 2017 Data

features_2017 <- read_csv("features-hamby.csv") #Reading in csv
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   match = col_logical(),
##   study1 = col_character(),
##   study2 = col_character(),
##   barrel2 = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 31240 parsing failures.
##   row     col expected actual                 file
## 90507 barrel1 a double      A 'features-hamby.csv'
## 90508 barrel1 a double      A 'features-hamby.csv'
## 90509 barrel1 a double      A 'features-hamby.csv'
## 90510 barrel1 a double      A 'features-hamby.csv'
## 90511 barrel1 a double      A 'features-hamby.csv'
## ..... ....... ........ ...... ....................
## See problems(...) for more details.
features_2017 <- features_2017 %>%
  select(-land1_id, -land2_id) %>% # removing
    filter(study1 != "Cary" & study2 != "Cary") %>%
        filter(study1 == study2) %>%
          select(-study2) %>%
      rename(BarrelB = barrel1, BulletB = bullet1, LandB = land1) %>% # Changed column names
      rename(BarrelA = barrel2, BulletA = bullet2, LandA = land2) %>% # Changed column names
          mutate(study1 = gsub("Hamby44", "Hamby173", study1)) %>% #Chnaging observation name
            mutate(study1 = factor(study1, levels = c("Hamby173", "Hamby252"))) %>% # for ordering principles
      rename(ccf_2017 = ccf, rough_cor_2017 = rough_cor, lag_2017 = lag, D_2017 = D, sd_D_2017 = sd_D, signature_length_2017 = signature_length, overlap_2017 = overlap, matches_2017 = matches, mismatches_2017 = mismatches, cms_2017 = cms, non_cms_2017 = non_cms, sum_peaks_2017 = sum_peaks) # Column names changed for comparing purposes 


#Exploring we see that all lettered Barrels only have bullet equal to 1. No need to worry about a lettered barrel having a bullet 2
# Code will not look like this forever... Duct Tape... Will Dplyr soon...

features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "A"] <- "A") #Assign A to Bullets with column "BarrelA" equalequal to "A"
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "B"] <- "B")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "C"] <- "C")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "D"] <- "D")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "E"] <- "E")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "F"] <- "F")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "G"] <- "G")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "H"] <- "H")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "I"] <- "I")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "J"] <- "J")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "L"] <- "L")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "M"] <- "M")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "N"] <- "N")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "Q"] <- "Q")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "R"] <- "R")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "S"] <- "S")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "U"] <- "U")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "V"] <- "V")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "W"] <- "W")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "X"] <- "X")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "Y"] <- "Y")
features_2017 <- within(features_2017, BulletA[BulletA == 1 & BarrelA == "Z"] <- "Z")



features_2017$BarrelA[features_2017$BarrelA == "A"] <- "Unknown" #Group lettered Barrels together into Barrel "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "B"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "C"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "D"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "E"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "F"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "G"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "H"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "I"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "J"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "L"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "M"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "N"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "Q"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "R"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "S"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "U"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "V"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "W"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "X"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "Y"] <- "Unknown"
features_2017$BarrelA[features_2017$BarrelA == "Z"] <- "Unknown"

features_2017 <- features_2017 %>% 
  mutate(Bullet1 = paste(study1, BarrelB, BulletB, LandB, sep = "-"),
         Bullet2 = paste(study1, BarrelA, BulletA, LandA, sep = "-"))# Creating ID similar to Heike's 

features_2017 <- features_2017[order(features_2017$study1), ]
#Ordered Set column so that all 173 observations came before 252 observations
# At first glance Hamby173 Barrel 10, Bullet 1, Land 1 seems to be missing

features_2017 <- features_2017 %>% 
  mutate(BarrelB = as.character(BarrelB), 
         BulletA = as.character(BulletA), 
         BulletB = as.character(BulletB), 
         LandA = as.character(LandA), 
         LandB = as.character(LandB))

Comparing 2017 and 2019 after basic cleaning

table(comparisons$BarrelA)
## 
##       1      10       2       3       4       5       6       7       8 
##    5040    5040    5040    5040    5040    5040    5040    5040    5040 
##       9 Unknown 
##    5040   37800
table(comparisons$BulletA)
## 
##     1     2     A     B     C     D     E     F     G     H     I     J 
## 25200 25200  1260  2520  2520  1260  2520  1260  1260  2520  1260  1260 
##     L     M     N     Q     R     S     U     V     W     X     Y     Z 
##  2520  2520  1260  1260  1260  1260  2520  1260  1260  2520  1260  1260
table(comparisons$LandA)
## 
##     1     2     3     4     5     6 
## 14700 14700 14700 14700 14700 14700
#----------------------------------------------------------------#

table(features_2017$BarrelA)
## 
##       1      10       2       3       4       5       6       7       8 
##     420     132     708     949    1214    1548    1753    2112    2400 
##       9 Unknown 
##    2570   28425
table(features_2017$BulletA)
## 
##    1    2    A    B    C    D    E    F    G    H    I    J    L    M    N 
## 6584 7222  723 1359 1548  789 1656  861  867 1800  939  933 1944 1845 1041 
##    Q    R    S    U    V    W    X    Y    Z 
##  865 1077 1071 2033 1149 1185 2358 1173 1209
table(features_2017$LandA)
## 
##    1    2    3    4    5    6 
## 6850 6863 7158 6700 7295 7365

Heat Maps

library(viridis)
## Loading required package: viridisLite
comparisons %>%                          #Comparison heatmap for 2019 data
  filter(!is.na(BarrelA)) %>% 
    filter(!is.na(BarrelB)) %>% 
      group_by(BarrelA, BarrelB, Set) %>% 
        summarise(count = n()) %>% 
  ggplot(aes(x = BarrelA, y = BarrelB))+
    geom_tile(aes(fill = count))+
      scale_fill_viridis(option = "plasma", direction = -1)+ 
        geom_text(aes(label = count))+ 
          ggtitle("Comparison Tiles")+ 
            theme_bw()+ 
              facet_grid(~Set) #Good way to compare graphs below.  Shows how bad "features-hamby.csv" really is. 

comparisons %>% 
  filter(!is.na(BarrelA)) %>% 
    filter(!is.na(BarrelB)) %>% 
      group_by(BarrelA, BarrelB, Set) %>% summarise(count = max(signature_length)) %>% 
  ggplot(aes(x = BarrelA, y = BarrelB))+
    geom_tile(aes(fill = count))+
      scale_fill_viridis(option = "plasma", direction = -1)+ 
        geom_text(aes(label = count))+ 
          ggtitle("Comparison Signature_Length Tiles")+ 
            theme_bw()+
              facet_grid(~Set)

features_2017 %>%                        # features_2017 plot 1
  filter(!is.na(BarrelA)) %>% 
    filter(!is.na(BarrelB)) %>% 
      group_by(BarrelA, BarrelB, study1) %>% 
        summarise(count = n()) %>% 
  ggplot(aes(x = BarrelA, y = BarrelB))+
    geom_tile(aes(fill = count))+
      scale_fill_viridis(option = "plasma", direction = -1)+ 
        geom_text(aes(label = count))+ 
          ggtitle("Comparison Count Version 1")+ 
            theme_bw()+ theme(axis.text.x = element_text(angle = 10))+
              facet_grid(~study1) #Very Promising Results.  This geom_tile/heat_map shows my analysis in visual format.

features_2017 %>%                       # features_2017 plot 2
 filter(!is.na(BarrelA)) %>%
   filter(!is.na(BarrelB)) %>%
     group_by(BarrelA, BarrelB, Bullet1, Bullet2, study1) %>%
      arrange(desc(signature_length_2017)) %>% 
       filter(row_number() == 1) %>% 
                ungroup() %>% 
  group_by(BarrelA, BarrelB, study1) %>%
    summarise(count = n()) %>% 
      ggplot(aes(x = BarrelA, y = BarrelB))+
       geom_tile(aes(fill = count))+
        scale_fill_viridis(option = "plasma", direction = -1)+
          geom_text(aes(label = count))+
            ggtitle("Comparison Count Version 2")+
              theme_bw()+ theme(axis.text.x = element_text(angle = 10))+
                 facet_grid(~study1)

We shall use the comparison plot from our 2019 data to help compare our data from 2017.

Looking at features_2017 plot2 we can see that there is missing data on the Barrel-Bullet-Land level.

Finding Missing Values

# Let's try to find those missing values with an easy way
# Inspiration came from Susan and 
#https://www.r-bloggers.com/r-sorting-a-data-frame-by-the-contents-of-a-column/
#https://www.rdocumentation.org/packages/base/versions/3.6.0/topics/t 

comparisons <- comparisons %>% rename(ccf_2019 = ccf, rough_cor_2019 = rough_cor, lag_2019 = lag, D_2019 = D, sd_D_2019 = sd_D, signature_length_2019 = signature_length, overlap_2019 = overlap, matches_2019 = matches, mismatches_2019 = mismatches, cms_2019 = cms, non_cms_2019 = non_cms, sum_peaks_2019 = sum_peaks)

comparisons_check <- comparisons %>% 
  select(Bullet1, Bullet2) #Remove Columns we wont be using

features_2017_check <- features_2017 %>% 
  select(Bullet1, Bullet2) # Remove Columns we wont be using

finder <- t(apply(comparisons_check,1,sort))
finder <- data.frame(finder)

#Take Transpose and Sort in order... Now we have:
#AB   => AB
#BA   => AB 

finder <- finder[!duplicated(finder),] # Remove duplicates 

finder <- finder %>% 
  rename(Bullet2 = X2, Bullet1 = X1) %>% 
    select(Bullet1, Bullet2) 
      
finder <- anti_join(finder, features_2017_check, by = c("Bullet2")) # Shows what data is missing and if you add Bullet1 to the anti_join you can see all thee missing Unknown|Unknown Comparisons 
## Warning: Column `Bullet2` joining factor and character vector, coercing
## into character vector
head(finder, 10)
##            Bullet1              Bullet2
## 1  Hamby173-10-1-1      Hamby173-10-1-1
## 2  Hamby173-10-1-1       Hamby173-3-2-1
## 3  Hamby173-10-1-1       Hamby173-4-2-1
## 4  Hamby173-10-1-1 Hamby173-Unknown-M-4
## 5  Hamby173-10-1-2       Hamby173-3-2-1
## 6  Hamby173-10-1-2       Hamby173-4-2-1
## 7  Hamby173-10-1-2 Hamby173-Unknown-M-4
## 8  Hamby173-10-1-3       Hamby173-3-2-1
## 9  Hamby173-10-1-3       Hamby173-4-2-1
## 10 Hamby173-10-1-3 Hamby173-Unknown-M-4

Creating Data Frames for feature comparisons

#Better version of above method I think, using dplyr

comparisons %>% group_by(Bullet1, Bullet2, ccf_2019) %>% filter(Bullet1 == Bullet2) %>% summarise()
## # A tibble: 420 x 3
## # Groups:   Bullet1, Bullet2 [420]
##    Bullet1         Bullet2         ccf_2019
##    <chr>           <chr>              <dbl>
##  1 Hamby173-10-1-1 Hamby173-10-1-1        1
##  2 Hamby173-10-1-2 Hamby173-10-1-2        1
##  3 Hamby173-10-1-3 Hamby173-10-1-3        1
##  4 Hamby173-10-1-4 Hamby173-10-1-4        1
##  5 Hamby173-10-1-5 Hamby173-10-1-5        1
##  6 Hamby173-10-1-6 Hamby173-10-1-6        1
##  7 Hamby173-10-2-1 Hamby173-10-2-1        1
##  8 Hamby173-10-2-2 Hamby173-10-2-2        1
##  9 Hamby173-10-2-3 Hamby173-10-2-3        1
## 10 Hamby173-10-2-4 Hamby173-10-2-4        1
## # … with 410 more rows
comparisons_for_join <- comparisons %>% 
  filter(ccf_2019 != 1) %>% 
    rowwise() %>%
      mutate(sorter = paste(sort(c(Bullet1, Bullet2)), collapse = "-")) %>% 
        distinct(sorter, .keep_all = T) %>% select(-sorter)

#Assign Comparisons
#Every Row
# create col "sorter", which pastes both cols and sorts in order
# remove all rows with duplicate values in sorter
# no need for column 

comparisons_for_join <- na.omit(comparisons_for_join)# Remove Na
features_2017 <- na.omit(features_2017) # Remove Na

Joined_df <- inner_join(comparisons_for_join, features_2017, by = c("Bullet1", "Bullet2")) #ScatterPlot Comparisons for profile_id version 1

ScatterPlots

Joined_df %>% ggplot(aes(x = ccf_2019, y = ccf_2017))+
  geom_bin2d(aes(fill = match), bins = 65)+
  geom_smooth(method = lm)+
      geom_smooth(aes(colour = "red"),show.legend = FALSE)+
        scale_fill_manual(values = c("#cab2d6","#fdbf6f"))+
          theme_bw()+
            xlab("2019") + ylab("2017")+
              ggtitle("Cross Correlation")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggsave("ScatterPlot_6.png")
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#------------------------------------------------------------------###
Joined_df %>% ggplot(aes(x = rough_cor_2019, y = rough_cor_2017))+
  geom_bin2d(aes(fill = match), bins = 65)+
  geom_smooth(method = lm)+
      geom_smooth(aes(colour = "red"), show.legend = FALSE)+
        scale_fill_manual(values = c("#cab2d6","#fdbf6f"))+
          theme_bw()+
            xlab("2019") + ylab("2017")+
              ggtitle("Rough Correlation from two (or more) Aligned Signatures")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggsave("Scatter_Plot7.png")
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#------------------------------------------------------------------###
Joined_df %>% ggplot(aes(x = lag_2019, y = lag_2017))+
  geom_bin2d(aes(fill = match), bins = 65)+
  geom_smooth(method = lm)+
      geom_smooth(aes(colour = "red"),show.legend = FALSE)+
        scale_fill_manual(values = c("#cab2d6","#fdbf6f"))+
          theme_bw()+
            xlab("2019") + ylab("2017")+
              ggtitle("Lag from two (or more) Aligned Signatures")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggsave("Scatter_Plot8.png")
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#------------------------------------------------------------------###
Joined_df %>% ggplot(aes(x = D_2019, y = D_2017))+
  geom_bin2d(aes(fill = match), bins = 65)+
  geom_smooth(method = lm)+
      geom_smooth(aes(colour = "red"), show.legend = FALSE)+
        scale_fill_manual(values = c("#cab2d6","#fdbf6f"))+
          theme_bw()+
            xlab("2019") + ylab("2017")+
              ggtitle("Average Distance between two (or more) Aligned Signatures")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggsave("Scatter_Plot9.png")
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#------------------------------------------------------------------###
Joined_df %>% ggplot(aes(x = sd_D_2019, y = sd_D_2017))+
  geom_bin2d(aes(fill = match), bins = 65)+
  geom_smooth(method = lm)+
      geom_smooth(aes(colour = "red"), show.legend = FALSE)+
        scale_fill_manual(values = c("#cab2d6","#fdbf6f"))+
          theme_bw()+
            xlab("2019") + ylab("2017")+
              ggtitle("Variation in the Height Measurements between two Aligned Signatures")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggsave("Scatter_Plot10.png")
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#------------------------------------------------------------------###
Joined_df %>% ggplot(aes(x = signature_length_2019, y = signature_length_2017))+
  geom_bin2d(aes(fill = match), bins = 65)+
  geom_smooth(method = lm)+
      geom_smooth(aes(colour = "red"), show.legend = FALSE)+
        scale_fill_manual(values = c("#cab2d6","#fdbf6f"))+
          theme_bw()+
            xlab("2019") + ylab("2017")+
              ggtitle("signature_length_2019 Vs signature_length_2017")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggsave("Scatter_Plot11.png")
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#------------------------------------------------------------------###
Joined_df %>% ggplot(aes(x = cms_2019, y = cms_2017))+
  geom_bin2d(aes(fill = match), bins = 65)+
  geom_smooth(method = lm)+
      geom_smooth(aes(colour = "red"), show.legend = FALSE)+
        scale_fill_manual(values = c("#cab2d6","#fdbf6f"))+
          theme_bw()+
            xlab("2019") + ylab("2017")+
              ggtitle("Consecutively Matching Striation Marks")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggsave("Scatter_Plot12.png")
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#------------------------------------------------------------------###
Joined_df %>% ggplot(aes(x = non_cms_2019, y = non_cms_2017))+
  geom_bin2d(aes(fill = match), bins = 65)+
  geom_smooth(method = lm)+
      geom_smooth(aes(colour = "red"), show.legend = FALSE)+
        scale_fill_manual(values = c("#cab2d6","#fdbf6f"))+
          theme_bw()+
            xlab("2019") + ylab("2017")+
              ggtitle("Consecutively Non-Matching Striation Marks")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggsave("Scatter_Plot13.png")
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
#------------------------------------------------------------------###
Joined_df %>% ggplot(aes(x = sum_peaks_2019, y = sum_peaks_2017))+
  geom_bin2d(aes(fill = match), bins = 65)+
  geom_smooth(method = lm)+
      geom_smooth(aes(colour = "red"), show.legend = FALSE)+
        scale_fill_manual(values = c("#cab2d6","#fdbf6f"))+
          theme_bw()+
            xlab("2019") + ylab("2017")+
              ggtitle("Combined Height of Aligned Striae between two Aligned Signatures")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggsave("Scatter_Plot14.png")
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Looking at the scatterplots we can make some observations but before that, let me go over some details. Two geom_smooth() layers were added. The blue line represents a lm method and the red line represents the gam method. Looking at the graphs where each feature is compared with its other version, we see there is no clear linear relationship between x and y. Over plotting is very bad in this case. Though, the geom_smooth() methods “lm” and “gam” help a lot. These two lines make it easier to see what sort of relationship is going on.

Hamby_Data_Long_by_YEAR <- Joined_df %>% 
  select(1:2, match, ccf_2019:sum_peaks_2019, ccf_2017:sum_peaks_2017) %>% 
    gather(key="measure", value="value", ccf_2019:sum_peaks_2017) %>%   
      mutate(Year = if_else(grepl("^.+(2017)$", measure), 2017, 2019),
             measure = str_remove(measure, "(_2017|_2019)?$"))
        
Hamby_Data_Long_by_YEAR %>%
    ggplot(aes(value, fill = match))+
      geom_density(position = "identity", alpha = 0.50)+
        facet_wrap(~Year+measure, nrow = 2, scales = "free")+
          scale_fill_brewer(palette = "Paired") + theme_bw()+
            ggtitle("Marginal Density Plots")

ggsave("Density_Plot.png", width = 15, height = 7)
#Statistical Summary

Joined_df %>% select(ccf_2019:sum_peaks_2019, ccf_2017:sum_peaks_2017) %>% summary()
##     ccf_2019       rough_cor_2019       lag_2019           D_2019         
##  Min.   :0.06965   Min.   :0.06965   Min.   :-0.7380   Min.   :0.0003402  
##  1st Qu.:0.30370   1st Qu.:0.30370   1st Qu.:-0.2190   1st Qu.:0.0021148  
##  Median :0.36924   Median :0.36924   Median :-0.0380   Median :0.0024983  
##  Mean   :0.38126   Mean   :0.38126   Mean   :-0.0446   Mean   :0.0026513  
##  3rd Qu.:0.44555   3rd Qu.:0.44555   3rd Qu.: 0.1340   3rd Qu.:0.0030529  
##  Max.   :0.97861   Max.   :0.97861   Max.   : 0.3400   Max.   :0.0110021  
##    sd_D_2019        signature_length_2019  overlap_2019  matches_2019   
##  Min.   :0.001166   Min.   :1.448         Min.   :1     Min.   : 0.000  
##  1st Qu.:0.003685   1st Qu.:1.983         1st Qu.:1     1st Qu.: 0.887  
##  Median :0.004331   Median :2.123         Median :1     Median : 1.488  
##  Mean   :0.004483   Mean   :2.128         Mean   :1     Mean   : 1.688  
##  3rd Qu.:0.005130   3rd Qu.:2.264         3rd Qu.:1     3rd Qu.: 2.314  
##  Max.   :0.013486   Max.   :2.800         Max.   :1     Max.   :13.093  
##  mismatches_2019     cms_2019        non_cms_2019    sum_peaks_2019  
##  Min.   : 0.000   Min.   : 0.0000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 9.046   1st Qu.: 0.4878   1st Qu.: 4.491   1st Qu.: 1.385  
##  Median :10.695   Median : 0.9503   Median : 5.869   Median : 2.539  
##  Mean   :10.725   Mean   : 1.1020   Mean   : 6.314   Mean   : 2.843  
##  3rd Qu.:12.373   3rd Qu.: 1.4623   3rd Qu.: 7.671   3rd Qu.: 3.930  
##  Max.   :20.275   Max.   :13.0933   Max.   :20.275   Max.   :22.514  
##     ccf_2017       rough_cor_2017        lag_2017        
##  Min.   :0.01429   Min.   :-0.86225   Min.   :-0.643750  
##  1st Qu.:0.21401   1st Qu.:-0.18413   1st Qu.:-0.117188  
##  Median :0.27648   Median :-0.05534   Median : 0.004687  
##  Mean   :0.29088   Mean   :-0.07194   Mean   : 0.006502  
##  3rd Qu.:0.35039   3rd Qu.: 0.05092   3rd Qu.: 0.131250  
##  Max.   :0.98110   Max.   : 0.96355   Max.   : 0.856250  
##      D_2017           sd_D_2017      signature_length_2017
##  Min.   : 0.05055   Min.   :0.8129   Min.   :1.705        
##  1st Qu.: 2.00260   1st Qu.:2.3375   1st Qu.:1.908        
##  Median : 2.73279   Median :2.7141   Median :1.923        
##  Mean   : 3.26170   Mean   :2.8010   Mean   :1.916        
##  3rd Qu.: 4.04116   3rd Qu.:3.1957   3rd Qu.:1.936        
##  Max.   :18.44221   Max.   :6.8780   Max.   :2.248        
##   overlap_2017     matches_2017    mismatches_2017     cms_2017     
##  Min.   :0.6500   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:0.8976   1st Qu.: 1.209   1st Qu.: 8.029   1st Qu.: 0.592  
##  Median :0.9398   Median : 2.232   Median : 9.499   Median : 1.140  
##  Mean   :0.9342   Mean   : 2.460   Mean   : 9.758   Mean   : 1.363  
##  3rd Qu.:0.9745   3rd Qu.: 3.299   3rd Qu.:11.218   3rd Qu.: 1.713  
##  Max.   :1.0000   Max.   :18.656   Max.   :33.684   Max.   :15.213  
##   non_cms_2017    sum_peaks_2017  
##  Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 3.676   1st Qu.: 1.624  
##  Median : 4.844   Median : 2.772  
##  Mean   : 5.356   Mean   : 3.031  
##  3rd Qu.: 6.542   3rd Qu.: 4.116  
##  Max.   :33.684   Max.   :20.908

Bullet Scores

Currently on hold

comparisons$rfscore <- predict(rtrees, newdata = comparisons, type = "prob")[,2]
head(comparisons, 500)

Bullet_Scores <- comparisons %>% 
  group_by(BulletA, BulletB) %>% 
    tidyr::nest()

Bullet_Scores <- Bullet_Scores %>% 
  mutate(Bullet_Score = data %>% 
          future_map_dbl(.f = function(d) max(compute_average_scores(land1 = d$LandA, land2 = d$LandB, d$rfscore))))

Bullet_Scores %>% 
  select(-data) %>% 
    arrange(desc(Bullet_Score))


Bullet_Scores <- Bullet_Scores %>% 
  mutate(data = data %>% 
           future_map(.f = function(d){
              d$samepath = bullet_to_land_predict(land1 = d$LandA, land2 = d$LandB, d$rfscore, difference = 0.1)
  d
}))


Bullet_Scores %>% 
  tidyr::unnest(data) 
Bullet_Scores_Examin <- Bullet_Scores %>% 
  tidyr::unnest(data)

Bullet_Scores_Examin %>% 
  filter(samepath == "TRUE")


Bullet_Scores_Examin %>% 
  filter(samepath == "FALSE")

Comparing rfscores

Currently on hold

library(gridExtra)

plot1 = comparisons %>%
  filter(BarrelA == 2 & BarrelB == 2) %>% 
    filter(BulletA == 1 & BulletB == 1 ) %>% 
      ggplot(aes(x = LandA, y = LandB, fill = rfscore))+ 
        geom_tile()+ 
          scale_fill_gradient2(low = "#000000", high = "#56B4E9", midpoint = 0.5) +facet_grid(BulletB~BulletA, labeller = "label_both")+ 
            xlab("Land A") + ylab("Land B") + theme(aspect.ratio = 1)+ ggtitle("Same Source and Same Bullet")+ facet_grid(~Set)

plot2 = comparisons %>%
  filter(BarrelA == 2 & BarrelB == 2) %>% 
    filter(BulletA == 1 & BulletB == 2 ) %>% 
      ggplot(aes(x = LandA, y = LandB, fill = rfscore))+ 
        geom_tile()+ 
          scale_fill_gradient2(low = "#000000", high = "#56B4E9", midpoint = 0.5)+
            facet_grid(BulletB~BulletA, labeller = "label_both")+ 
            xlab("Land A") + ylab("Land B") + theme(aspect.ratio = 1)+ 
              ggtitle("Same Source but Different Bullet") + facet_grid(~Set)


plot3 = comparisons %>% 
  filter(BarrelA == 10 & BarrelB == 5) %>% 
    filter(BulletA == 1 & BulletB == 2 ) %>% 
      ggplot(aes(x = LandA, y = LandB, fill = rfscore))+ 
        geom_tile()+ 
          scale_fill_gradient2(low = "#000000", high = "#56B4E9", midpoint = 0.5)+
            facet_grid(BulletB~BulletA, labeller = "label_both")+ 
            xlab("Land A") + ylab("Land B") + theme(aspect.ratio = 1)+
              ggtitle("Different Source") + facet_grid(~Set)

grid.arrange(plot1, plot2, plot3, ncol = 2)